In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA as PCA
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse.csgraph import connected_components
import umap # pip install umap-learn
from MulticoreTSNE import MulticoreTSNE as TSNE
import scipy.spatial
import sklearn
import sklearn.neighbors
import sklearn.manifold
import plotly
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import plotly.express as px
init_notebook_mode()

np.random.seed(seed=1234)

PKD1

In [2]:
id = pd.read_table('pkd1.fam', sep =' ', header=None, usecols=[0])
X = pd.read_table('pkd1.pruned.raw',sep =' ')
X = X.set_index(['FID']).drop(['IID','PAT','MAT','SEX'],axis= 1)
label = pd.read_table("labels", sep= '\t', low_memory=False)
X = X.loc[label['sample']].fillna(0)
X = X.astype("float64")
In [7]:
def my_scatter(title_, np_data, label):
    symbol_dict = {"GBR": "circle", "FIN": "square", "CEU": "cross", "TSI": "x"
                  , "CHS": "circle", "CHB": "square", "JPT": "cross"
                  , "PUR": "circle", "MXL":  "square"
                  , "YRI": "circle", "LWK": "square", "ASW": "cross"}
    df = pd.concat([pd.DataFrame(np_data, columns=["Dim 1", "Dim 2"]), label], axis=1)
    fig = px.scatter(df, x="Dim 1", y="Dim 2", color="super_pop", symbol="pop", symbol_map=symbol_dict, title=title_)
    iplot(fig)
    
proj = PCA(n_components=2).fit_transform(X)
my_scatter("2PCA", proj, label)
In [8]:
proj = TSNE(random_state=1234).fit_transform(X)
my_scatter("2t-SNE", proj, label)
In [9]:
proj = umap.UMAP(random_state=1234).fit_transform(X)
my_scatter("UMAP", proj, label)

PKD2

In [10]:
id = pd.read_table('pkd2.fam', sep =' ', header=None, usecols=[0])
X = pd.read_table('pkd2.pruned.raw',sep =' ')
X = X.set_index(['FID']).drop(['IID','PAT','MAT','SEX'],axis= 1)
label = pd.read_table("labels", sep= '\t', low_memory=False)
X = X.loc[label['sample']].fillna(0)
X = X.astype("float64")
In [11]:
    
proj = PCA(n_components=2).fit_transform(X)
my_scatter("2PCA", proj, label)
In [12]:
proj = TSNE(random_state=1234).fit_transform(X)
my_scatter("2t-SNE", proj, label)
In [13]:
proj = umap.UMAP(random_state=1234).fit_transform(X)
my_scatter("UMAP", proj, label)
In [ ]: